#clean up
rm(list=ls())

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(maps)
library(maptools)
library(spdep)
library(rgdal)
library(prevR)
library(car)
library(xtable)
library(rgdal)

#options
options(scipen=8)

#Set to working directory. Adjust the command below to reflect YOUR working directory.
#setwd('/Volumes/MONOGAN/gillAreal/ccesCode/')

###Load ZIP code locations from Tom Tom data###
#Points accessed on September 2, 2015 from http://www.arcgis.com/home/item.html?id=1eeaf4bb41314febb990e2e96f7178df
#Polygons accessed on September 2, 2015 from https://www.arcgis.com/home/item.html?id=8d2012a2016e484dafaac0451f9aea24
zip.polys.0<-readShapePoly(fn="Zipcode_Polys/Zipcodes_Polys.shp",proj4string=CRS("+proj=longlat +datum=NAD83"))
#zip.polys.1<-zip.polys.0[!zip.polys.0$STATE%in%c("AK","GU","HI","PR","VI"),]
zip.polys.1<-zip.polys.0[!zip.polys.0$STATE%in%c("GU","PR","VI"),]
#plot(zip.polys.0.a)
zip.longlat<-coordinates(zip.polys.1)
zip.polys.2<-spTransform(zip.polys.1,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
zip.polys.3<-as(zip.polys.2,'SpatialPolygons')

#save area, radius, zip code names, and easting/northing centroids
zip.area<-zip.polys.2$SQMI*2.58999
zip.radius<-sqrt(zip.area/pi)
zip.number<-as.numeric(as.character(zip.polys.2$ZIP_CODE))
zip.cent<-coordinates(zip.polys.2)
zip.state.fips<-zip.polys.2$STATE
zip.state<-as.character(recode(zip.state.fips,'1="AL";2="AK";4="AZ";5="AR";6="CA";8="CO";9="CT";10="DE";11="DC";12="FL";13="GA";15="HI";16="ID";17="IL";18="IN";19="IA";20="KS";21="KY";22="LA";23="ME";24="MD";25="MA";26="MI";27="MN";28="MS";29="MO";30="MT";31="NE";32="NV";33="NH";34="NJ";35="NM";36="NY";37="NC";38="ND";39="OH";40="OK";41="OR";42="PA";44="RI";45="SC";46="SD";47="TN";48="TX";49="UT";50="VT";51="VA";53="WA";54="WV";55="WI";56="WY"'))
zip<-as.data.frame(cbind(zip.number,zip.longlat,zip.cent,zip.radius,zip.area))
colnames(zip)<-c('zip','longitude','latitude','eastings','northings','radius','area')
zip$state<-zip.state
write.csv(zip,"zipCentroidsRadii.csv",row.names=F)
#head(zip)
#plot(x=zip[,'eastings'],y=zip[,'northings'],pch='.')

###MOVE ALASKA AND HAWAII###
#zip$eastings[zip$state=="HI"]<-zip$eastings[zip$state=="HI"]-(-5866.128 + 2210.669)
#zip$northings[zip$state=="HI"]<-zip$northings[zip$state=="HI"]-(85.16965-1233.83522) 
#zip$eastings[zip$state=="AK"]<-zip$eastings[zip$state=="AK"]-(-2623.807+ 2637.116)-30
#zip$northings[zip$state=="AK"]<-zip$northings[zip$state=="AK"]-(3201.53325-133.52946)
##plot(y=zip$northings,x=zip$eastings,pch='.')

###USDA urban-rural continuum codes by county###
#Accessed 15 October 2015 from http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UfCK11OE4xc
usda.0<-read.csv('ruralurbancodes2013.csv')
#subtract 1 to have a 0 origin
usda.0$rural<-usda.0$RUCC_2013-1
usda.0$FIPS.Code<-usda.0$FIPS
usda<-subset(usda.0, select=c(FIPS.Code,State,County_Name,rural))

#load CCES data
cces.0<-read.dta('cces08_common_output.dta',convert.factors=FALSE)
cces.1<-cces.0[!is.na(cces.0$v253),]
cces.1$FIPS.Code<-(1000*as.numeric(cces.1$v265))+cces.1$v270
cces.2<-subset(cces.1,select=c(v100,v200,v202,v203,v204,v206,v253,v256,v257,v258,v259,v263,v264,v265,v268,v270,v207,v208,v211,v212,v213,cc307,cc307a,cc310,cc311,cc312,cc313,cc317a,cc423,cc424,v209,cc333,v246,v219,v220,v221,v222,v223,v225,v226,v232,FIPS.Code))
cces.2$zip<-sprintf("%05d",cces.2$v253)

#merge cces with USDA urban/rural data
cces<-merge(y=cces.2,x=usda,by='FIPS.Code',all.y=T,all.x=F)

#make CCES ZIP codes numeric
cces$zip<-as.numeric(cces$zip)

#merge ZIP centroids with post-study zip code (v253)
combined.0<-merge(x=zip, y=cces, by.x='zip', by.y='zip')

#two-letter state abbreviations from CCES for later linking
combined.0$state.2<-recode(combined.0$v259,'1="AL";2="AK";4="AZ";5="AR";6="CA";8="CO";9="CT";10="DE";11="DC";12="FL";13="GA";15="HI";16="ID";17="IL";18="IN";19="IA";20="KS";21="KY";22="LA";23="ME";24="MD";25="MA";26="MI";27="MN";28="MS";29="MO";30="MT";31="NE";32="NV";33="NH";34="NJ";35="NM";36="NY";37="NC";38="ND";39="OH";40="OK";41="OR";42="PA";44="RI";45="SC";46="SD";47="TN";48="TX";49="UT";50="VT";51="VA";53="WA";54="WV";55="WI";56="WY"')
#length(table(combined.0$state.2))
#length(table(cces$v259))
#length(table(zip$state))
#summary(zip$zip)
#as.numeric(cces$zip[cces$v259==9])%in%zip$zip[zip$state=="CT"]
#(cces$zip[cces$v259==9])%in%zip$zip[zip$state=="CT"]

#clean ideology, age, and sex
combined.0$ideology<-combined.0$cc317a
combined.0$age<-2008-combined.0$v207
combined.0$female<-as.numeric(combined.0$v208==2)

#education: 6 point ordinal scale, subtract 1 so "no hs" is 0
combined.0$v213[combined.0$v213>6]<-NA
combined.0$educ<-combined.0$v213-1

#race, 1=white, 2=black, recode so 3="other"
combined.0$v211[combined.0$v211>8]<-NA
combined.0$v211[combined.0$v211>2]<-3
combined.0$race<-combined.0$v211

#employment status
combined.0$empstat<-recode(combined.0$v209,'1:2=1;3:4=2;5:8=3;9=NA')

#home ownership
combined.0$ownership<-recode(combined.0$cc333,'1=0;2=1;3:4=0;8=NA')

#family income
combined.0$inc14<-combined.0$v246
combined.0$inc14[combined.0$inc14>14]<-NA

#religion variables
#all not falling into one of these categories are treated as the reference group 
#e.g., those not identifying with a common U.S. religion are the reference
combined.0$catholic<-as.numeric(combined.0$v219==2)
combined.0$mormon<-as.numeric(combined.0$v219==3)
combined.0$orthodox<-as.numeric(combined.0$v219==4)
combined.0$jewish<-as.numeric(combined.0$v219==5)
combined.0$islam<-as.numeric(combined.0$v219==6)
combined.0$mainline<-as.numeric( (combined.0$v219==1 & combined.0$v220%in%c(7,8,9)) | (combined.0$v219==1 & combined.0$v220==11 & combined.0$v232%in%c(1,90)) | (combined.0$v219==1 & combined.0$v220==1 & combined.0$v222%in%c(2,3,4)) | (combined.0$v219==1 & combined.0$v220==2 & combined.0$v223%in%c(1,3,5,90)) | (combined.0$v219==1 & combined.0$v220==4 & combined.0$v225%in%c(1,4)) | (combined.0$v219==1 & combined.0$v220==5 & combined.0$v226%in%c(1,90)) )
combined.0$evangelical<-as.numeric( (combined.0$v219==1 & combined.0$v220%in%c(3,6,10,12))   | (combined.0$v219==1 & combined.0$v220==11 & combined.0$v232==2) | (combined.0$v219==1 & combined.0$v220==1 & combined.0$v222%in%c(1,5,6,7,8,9,10,90)) | (combined.0$v219==1 & combined.0$v220==2 & combined.0$v223%in%c(2,4)) | (combined.0$v219==1 & combined.0$v220==4 & combined.0$v225%in%c(2,3)) | (combined.0$v219==1 & combined.0$v220==5 & combined.0$v226%in%c(2,3,4,5,6)) )

#congressional district
combined.0$cd<-as.numeric(combined.0$v264)
combined.0$fipsCD<-100*combined.0$v259+combined.0$cd

#weight
combined.0$weight<-combined.0$v200

#subset to relevant variables and states; drop cases where ZIP state and listed state do not link up
combined.1<-subset(combined.0, select=c(state,zip,latitude,longitude,eastings,northings,ideology,age,female,educ,race,empstat,ownership,inc14,catholic,mormon,orthodox,jewish,islam,mainline,evangelical,FIPS.Code,rural,radius,area,weight,cd,fipsCD),subset=state==state.2)#state!="AK" & state!="HI" & 

combined.2<-na.omit(combined.1)
dim(combined.2)
table(combined.2$state)[-c(1,12,40)]
min(table(combined.2$state)[-c(1,12,40)])

#jitter eastings and northings
combined.2$eastCent<-combined.2$eastings
combined.2$northCent<-combined.2$northings
combined.2$eastings<-NA
combined.2$northings<-NA

for(i in 1:dim(combined.2)[1]){
	angle<-runif(1,min=0,max=2*pi)
	length<-runif(1,min=0,max=combined.2$radius[i])
	combined.2$eastings[i]<-combined.2$eastCent[i]+length*cos(angle)
	combined.2$northings[i]<-combined.2$northCent[i]+length*sin(angle)
	}
summary(combined.2)
#plot(y=combined.0$northings,x=combined.0$eastings,pch='.')
#plot(y=combined.1$northings,x=combined.1$eastings,pch='.')
#plot(y=combined.2$northings,x=combined.2$eastings,pch='.')

#drop each form of the data into a Stata file: with and without AK & HI
write.dta(combined.2, 'cces08reformattedFine.dta')
combined.3<-subset(combined.2,subset=state!="AK" & state!="HI")
write.dta(combined.3, 'cces08reformattedFine48.dta')

